##  data analysis '53 paper 
##  main results
rm(list = ls())

library(readxl)
library(multiwayvcov)
library(lmtest)
library(Hmisc)

setwd("~/Dropbox/Current projects/1953/Replication archive")


##  choose signal strength measure to employ
signal <- "lrm_4"

##  should municipalities be dropped/recoded to account for possible jamming?
##  jamming = km10, km12, km15; adjust = "drop", "min"
jamming <- ""
adjust <- ""

load(paste("1953data", signal, adjust, jamming, ".RData", sep = ""))



##  histogram of signal strength
temp <- hist(dat$lrm_4, main = "", xlab = "RIAS signal strength (dBuV/m) (ITM)",
40, col = "cornflowerblue", cex.axis = 1.2, cex.lab = 1.2, xlim = c(40,110))
temp$density = temp$counts/sum(temp$counts)*100
plot(temp, freq = FALSE, main = "", xlab = "RIAS signal strength (dBuV/m) (ITM)",
col = "cornflowerblue", cex.axis = 1.2, cex.lab = 1.2, xlim = c(40,110),
ylab = "Percentage", ylim = c(0,10))



## rescale distance to East Berlin so that coefficients are not too small
dat$distance_to_berlin <- dat$distance_to_berlin/100


table1 <- matrix(NA,45,8)
rownames(table1) <- c(	"intercept",
						"county capital",
						"KVP base",
						"$%$ agriculture and forestry",
						"$%$ mining",
						"$%$ metalworking industry",
						"$%$ chemical industry",
						"$%$ woods and plastics",
						"$%$ consumer goods",
						"$%$ construction",
						"$%$ transportation",
						"$%$ commerce",
						"$%$ services",
						"log(population size)",
						"$%$ population male",
						"population density",
						"$%$ population change 1939--50",
						"$%$ population in same {\\it Land}",
						"$%$ 8 years of education",
						"$%$ 10 years of education",
						"$%$ 12 years of education",
						"$%$ more than 12 years of education",
						"$%$ trade school degree",
						"$%$ university degree",
						"$%$ protestant",
						"$%$ catholic",
						"$%$ agnostic",
						"living space per capita (m$^2$)", 
						"$%$ turnout '32",
						"$%$ invalid votes '32",
						"$%$ NSDAP '32",
						"$%$ SPD '32",
						"$%$ KPD '32",
						"$%$ Zentrum '32",
						"$%$ DNVP '32",
						"$%$ DVP '32",
						"RIAS signal strength",
						"RIAS signal strength$^2$",
						"RIAS signal strength$^3$",
						"distance to East Berlin (100 km)",
						"distance to East Berlin$^2$ (100 km)",
						"distance to East Berlin$^3$ (100 km)",
						"Wald test RIAS $p$-value",
						"Wald test distance $p$-value",
						"district fixed effects")

table1[45,c(1,5)] <- "No"
table1[45,c(3,7)] <- "Yes"



#################################################
## probit model 1 --- RIAS only; no fixed effects
#################################################
source("hl.R")
probit.out <- glm(protest ~	
							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout32 +
							invalid32 +
							NSDAP32 +
							SPD32 +
							KPD32 +
							Z32 +
							DNVP32 +
							DVP32 +
							signal + I(signal^2) + I(signal^3) +
							as.factor(size),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))

summary(probit.out)

##  clustered standard errors
vcov.cl <- cluster.vcov(probit.out, dat$county)
temp <- coeftest(probit.out, vcov.cl)

table1[1:39,1:2] <- hl(temp[1:39,1:2])


##  Wald test
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["signal"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(signal^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(signal^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[43,1] <- round(1 - pchisq(W, df = nrow(Q)),3)



##################################################
## Simulate effect of RIAS signal based on model 1
##################################################
library(MASS)
X.pred <- model.matrix(probit.out)
d <- seq(from = min(dat$signal), to = max(dat$signal), length.out = 20)
sims <- 10000
out <- matrix(NA,length(d),sims)
rownames(out) <- d


set.seed(123)
m <- 1; n <- 1
for(m in 1:length(d))	{

	X.pred[,"signal"] <- d[m]
	X.pred[,"I(signal^2)"] <- d[m]^2
	X.pred[,"I(signal^3)"] <- d[m]^3

		for(n in 1:sims)	{

			gamma.tilde <- mvrnorm(n = 1, mu = probit.out$coef, Sigma = vcov.cl)
			out[m,n] <- mean(pnorm(X.pred %*% gamma.tilde))

							}

	cat(m,"\n")

						}

out2 <- matrix(NA,3,length(d))
out2[1,] <- apply(out,1,mean)
out2[2,] <- apply(out,1,sort)[(sims*.025)+1,]
out2[3,] <- apply(out,1,sort)[(sims*.975),]


pdf("plot1.pdf")
plot(d, out2[1,], ylim = c(0,.5), type = "l", xlab = "RIAS signal strength (dBuV/m)",
ylab = "Estimated probability of protest event", cex.lab = 1.2, cex.axis = 1.2, lwd = 2)

polygon(x = c(d, rev(d)), y = c(out2[2,], rev(out2[3,])), col = "#66666666", border = NA)
lines(d, out2[1,], lwd = 2)
dev.off()



################################################
### model 2 -- RIAS only; district fixed effects
################################################
probit.out <- glm(protest ~	
							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout32 +
							invalid32 +
							NSDAP32 +
							SPD32 +
							KPD32 +
							Z32 +
							DNVP32 +
							DVP32 +
							signal + I(signal^2) + I(signal^3) +
							as.factor(size) +
							as.factor(district),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))

summary(probit.out)


##  clustered standard errors
vcov.cl <- cluster.vcov(probit.out, dat$county)
temp <- coeftest(probit.out, vcov.cl)

table1[1:39,3:4] <- hl(temp[1:39,1:2])


##  Wald test
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["signal"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(signal^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(signal^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

##  Compare to Chi^2 distribution to get p-value
table1[43,3] <- round(1 - pchisq(W, df = nrow(Q)),3)



##################################################
## Simulate effect of RIAS signal based on model 2
##################################################
library(MASS)
X.pred <- model.matrix(probit.out)
d <- seq(from = min(dat$signal), to = max(dat$signal), length.out = 20)
sims <- 10000
out <- matrix(NA,length(d),sims)
rownames(out) <- d


set.seed(123)
m <- 1; n <- 1
for(m in 1:length(d))	{

	X.pred[,"signal"] <- d[m]
	X.pred[,"I(signal^2)"] <- d[m]^2
	X.pred[,"I(signal^3)"] <- d[m]^3

		for(n in 1:sims)	{

			gamma.tilde <- mvrnorm(n = 1, mu = probit.out$coef, Sigma = vcov.cl)
			out[m,n] <- mean(pnorm(X.pred %*% gamma.tilde))

							}

	cat(m,"\n")

						}

out2 <- matrix(NA,3,length(d))
out2[1,] <- apply(out,1,mean)
out2[2,] <- apply(out,1,sort)[(sims*.025)+1,]
out2[3,] <- apply(out,1,sort)[(sims*.975),]


pdf("plot2.pdf")
plot(d, out2[1,], ylim = c(0,.5), type = "l", xlab = "RIAS signal strength (dBuV/m)",
ylab = "Estimated probability of protest event", cex.lab = 1.2, cex.axis = 1.2, lwd = 2)

polygon(x = c(d, rev(d)), y = c(out2[2,], rev(out2[3,])), col = "#66666666", border = NA)
lines(d, out2[1,], lwd = 2)
dev.off()



############################################################
## model 3 --- RIAS and distance to Berlin; no fixed effects
############################################################
probit.out <- glm(protest ~

							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout32 +
							invalid32 +
							NSDAP32 +
							SPD32 +
							KPD32 +
							Z32 +
							DNVP32 +
							DVP32 +

							signal + I(signal^2) + I(signal^3) +
							distance_to_berlin +
							I(distance_to_berlin^2) +
							I(distance_to_berlin^3) +
							as.factor(size),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out)

## clustered standard errors
vcov.cl <- cluster.vcov(probit.out, dat$county)
temp <- coeftest(probit.out, vcov.cl)

table1[1:42,5:6] <- hl(temp[1:42,1:2])


## Wald test RIAS
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["signal"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(signal^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(signal^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[43,5] <- round(1 - pchisq(W, df = nrow(Q)),3)


## Wald test distance
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["distance_to_berlin"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(distance_to_berlin^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(distance_to_berlin^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[44,5] <- round(1 - pchisq(W, df = nrow(Q)),3)



##################################################
## simulate effect of RIAS signal based on model 3
##################################################
library(MASS)
X.pred <- model.matrix(probit.out)
d <- seq(from = min(dat$signal), to = max(dat$signal), length.out = 20)
sims <- 10000
out <- matrix(NA,length(d),sims)
rownames(out) <- d


set.seed(123)
m <- 1; n <- 1
for(m in 1:length(d))	{

	X.pred[,"signal"] <- d[m]
	X.pred[,"I(signal^2)"] <- d[m]^2
	X.pred[,"I(signal^3)"] <- d[m]^3

		for(n in 1:sims)	{

			gamma.tilde <- mvrnorm(n = 1, mu = probit.out$coef, Sigma = vcov.cl)
			out[m,n] <- mean(pnorm(X.pred %*% gamma.tilde))

							}

	cat(m,"\n")

						}


out2 <- matrix(NA,3,length(d))
out2[1,] <- apply(out,1,mean)
out2[2,] <- apply(out,1,sort)[(sims*.025)+1,]
out2[3,] <- apply(out,1,sort)[(sims*.975),]


pdf("plot3.pdf")
plot(d, out2[1,], ylim = c(0,.6), type = "l", xlab = "RIAS signal strength (dBuV/m)",
ylab = "Estimated probability of protest event", cex.lab = 1.2, cex.axis = 1.2, lwd = 2)

polygon(x = c(d, rev(d)), y = c(out2[2,], rev(out2[3,])), col = "#66666666", border = NA)
lines(d, out2[1,], lwd = 2)
dev.off()



#############################################
## simulate effect of distance to East Berlin
#############################################
library(MASS)
X.pred <- model.matrix(probit.out)
d <- seq(from = min(dat$distance_to_berlin), to = max(dat$distance_to_berlin),
			length.out = 20)
sims <- 10000
out <- matrix(NA,length(d),sims)
rownames(out) <- d


set.seed(123)
m <- 1; n <- 1
for(m in 1:length(d))	{

	X.pred[,"distance_to_berlin"] <- d[m]
	X.pred[,"I(distance_to_berlin^2)"] <- d[m]^2
	X.pred[,"I(distance_to_berlin^3)"] <- d[m]^3

		for(n in 1:sims)	{

			gamma.tilde <- mvrnorm(n = 1, mu = probit.out$coef, Sigma = vcov.cl)
			out[m,n] <- mean(pnorm(X.pred %*% gamma.tilde))


							}

	cat(m,"\n")

						}


out2 <- matrix(NA,3,length(d))
out2[1,] <- apply(out,1,mean)
out2[2,] <- apply(out,1,sort)[(sims*.025)+1,]
out2[3,] <- apply(out,1,sort)[(sims*.975),]


pdf("plot3dist.pdf")
plot(d, out2[1,], ylim = c(0,.7), type = "l", xlab = "Distance to East Berlin (km)",
ylab = "Estimated probability of protest event", cex.lab = 1.2, cex.axis = 1.2, lwd = 2)

polygon(x = c(d, rev(d)), y = c(out2[2,], rev(out2[3,])),
col = "#66666666", border = NA)
lines(d, out2[1,], lwd = 2)
dev.off()



#########################################################
## model 4 --- RIAS and distance to Berlin; fixed effects
#########################################################						
probit.out <- glm(protest ~

							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout32 +
							invalid32 +
							NSDAP32 +
							SPD32 +
							KPD32 +
							Z32 +
							DNVP32 +
							DVP32 +

							signal + I(signal^2) + I(signal^3) +
							distance_to_berlin +
							I(distance_to_berlin^2) +
							I(distance_to_berlin^3) +
							as.factor(size) +
							as.factor(district),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out)

## clustered standard errors
vcov.cl <- cluster.vcov(probit.out, dat$county)
temp <- coeftest(probit.out, vcov.cl)

table1[1:42,7:8] <- hl(temp[1:42,1:2])



## Wald test RIAS
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["signal"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(signal^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(signal^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[43,7] <- round(1 - pchisq(W, df = nrow(Q)),3)


## Wald test distance
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["distance_to_berlin"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(distance_to_berlin^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(distance_to_berlin^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[44,7] <- round(1 - pchisq(W, df = nrow(Q)),3)



##################################################
## simulate effect of RIAS signal based on model 4
##################################################
library(MASS)
X.pred <- model.matrix(probit.out)
d <- seq(from = min(dat$signal), to = max(dat$signal), length.out = 20)
sims <- 10000
out <- matrix(NA,length(d),sims)
rownames(out) <- d


set.seed(123)
m <- 1; n <- 1
for(m in 1:length(d))	{

	X.pred[,"signal"] <- d[m]
	X.pred[,"I(signal^2)"] <- d[m]^2
	X.pred[,"I(signal^3)"] <- d[m]^3

		for(n in 1:sims)	{

			gamma.tilde <- mvrnorm(n = 1, mu = probit.out$coef, Sigma = vcov.cl)
			out[m,n] <- mean(pnorm(X.pred %*% gamma.tilde))

							}

	cat(m,"\n")

						}


out2 <- matrix(NA,3,length(d))
out2[1,] <- apply(out,1,mean)
out2[2,] <- apply(out,1,sort)[(sims*.025)+1,]
out2[3,] <- apply(out,1,sort)[(sims*.975),]


pdf("plot4.pdf")
plot(d, out2[1,], ylim = c(0,.6), type = "l", xlab = "RIAS signal strength (dBuV/m)",
ylab = "Estimated probability of protest event", cex.lab = 1.2, cex.axis = 1.2, lwd = 2)

polygon(x = c(d, rev(d)), y = c(out2[2,], rev(out2[3,])), col = "#66666666", border = NA)
lines(d, out2[1,], lwd = 2)
dev.off()



#############################################
## simulate effect of distance to East Berlin
#############################################
library(MASS)
X.pred <- model.matrix(probit.out)
d <- seq(from = min(dat$distance_to_berlin), to = max(dat$distance_to_berlin),
			length.out = 20)
sims <- 10000
out <- matrix(NA,length(d),sims)
rownames(out) <- d


set.seed(123)
m <- 1; n <- 1
for(m in 1:length(d))	{

	X.pred[,"distance_to_berlin"] <- d[m]
	X.pred[,"I(distance_to_berlin^2)"] <- d[m]^2
	X.pred[,"I(distance_to_berlin^3)"] <- d[m]^3

		for(n in 1:sims)	{

			gamma.tilde <- mvrnorm(n = 1, mu = probit.out$coef, Sigma = vcov.cl)
			out[m,n] <- mean(pnorm(X.pred %*% gamma.tilde))


							}

	cat(m,"\n")

						}


out2 <- matrix(NA,3,length(d))
out2[1,] <- apply(out,1,mean)
out2[2,] <- apply(out,1,sort)[(sims*.025)+1,]
out2[3,] <- apply(out,1,sort)[(sims*.975),]


pdf("plot4dist.pdf")
plot(d, out2[1,], ylim = c(0,.7), type = "l", xlab = "Distance to East Berlin (km)",
ylab = "Estimated probability of protest event", cex.lab = 1.2, cex.axis = 1.2, lwd = 2)

polygon(x = c(d, rev(d)), y = c(out2[2,], rev(out2[3,])),
col = "#66666666", border = NA)
lines(d, out2[1,], lwd = 2)
dev.off()



## output table1
latex(table1, file = "")



################
##  Placebo test
################
balance.out <- lm(signal ~	
							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout32 +
							invalid32 +
							NSDAP32 +
							SPD32 +
							KPD32 +
							Z32 +
							DNVP32 +
							DVP32 +
							as.factor(size) +
							distance_to_berlin +
							I(distance_to_berlin^2) +
							I(distance_to_berlin^3) +
							as.factor(district),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat)

summary(balance.out)
r <- residuals(balance.out)


##  MI 1
out.1 <- lm(r ~ turnout.m1 + invalid.m1 + SED.m1 + LDP.m1 + CDU.m1, dat = dat)
summary(out.1)

##  clustered standard errors
vcov.1 <- cluster.vcov(out.1, dat$county)
temp.1 <- coeftest(out.1, vcov.1)


##  MI 2
out.2 <- lm(r ~ turnout.m2 + invalid.m2 + SED.m2 + LDP.m2 + CDU.m2, dat = dat)
summary(out.2)

##  clustered standard errors
vcov.2 <- cluster.vcov(out.2, dat$county)
temp.2 <- coeftest(out.2, vcov.2)


##  MI 3
out.3 <- lm(r ~ turnout.m3 + invalid.m3 + SED.m3 + LDP.m3 + CDU.m3, dat = dat)
summary(out.3)

##  clustered standard errors
vcov.3 <- cluster.vcov(out.3, dat$county)
temp.3 <- coeftest(out.3, vcov.3)


##  MI 4
out.4 <- lm(r ~ turnout.m4 + invalid.m4 + SED.m4 + LDP.m4 + CDU.m4, dat = dat)
summary(out.4)

##  clustered standard errors
vcov.4 <- cluster.vcov(out.4, dat$county)
temp.4 <- coeftest(out.4, vcov.4)


##  MI 5
out.5 <- lm(r ~ turnout.m5 + invalid.m5 + SED.m5 + LDP.m5 + CDU.m5, dat = dat)
summary(out.5)

##  clustered standard errors
vcov.5 <- cluster.vcov(out.5, dat$county)
temp.5 <- coeftest(out.5, vcov.5)



####################################
## combine estimates and compute SEs
####################################
est.mi <- cbind(	temp.1[,1],
					temp.2[,1],
					temp.3[,1],
					temp.4[,1],
					temp.5[,1]
					)

se.mi <- cbind(		temp.1[,2],
					temp.2[,2],
					temp.3[,2],
					temp.4[,2],
					temp.5[,2]
					)

## using Rubin's notation (2002: 87)
theta.bar <- apply(est.mi,1,mean)
W.bar <- apply(se.mi,1,mean)
B <- (est.mi - replicate(5,theta.bar))^2
B <- apply(B,1,sum)/4
Tot <- W.bar + B*6/5

v <- 4 * (1 + 1/6*W.bar/B)^2

##  individual coefficients
cbind(theta.bar, sqrt(Tot))


## Wald test RIAS
sel <- 2:6

w1 <- out.1$coef[sel] %*% solve(vcov.1[sel,sel]) %*% out.1$coef[sel]
w2 <- out.2$coef[sel] %*% solve(vcov.2[sel,sel]) %*% out.2$coef[sel]
w3 <- out.3$coef[sel] %*% solve(vcov.3[sel,sel]) %*% out.3$coef[sel]
w4 <- out.4$coef[sel] %*% solve(vcov.4[sel,sel]) %*% out.4$coef[sel]
w5 <- out.5$coef[sel] %*% solve(vcov.5[sel,sel]) %*% out.5$coef[sel]

w.mean <- mean(c(w1,w2,w3,w4,w5))
w.mean.sq <- mean(c(sqrt(w1),sqrt(w2),sqrt(w3),sqrt(w4),sqrt(w5)))

temp <- sum(c(
			(sqrt(w1) - w.mean.sq)^2,
			(sqrt(w2) - w.mean.sq)^2,
			(sqrt(w3) - w.mean.sq)^2,
			(sqrt(w4) - w.mean.sq)^2,
			(sqrt(w5) - w.mean.sq)^2
			))

r <- 1.2 * 1/4 * temp
W <- (w.mean/3 - 6/4*r)/(1+r)
v <- 3^(-3/5) * 4 * (1+1/r)^2
round(1 - pf(W,3,v),3)



#########################################################
##  interaction between RIAS signal strength and distance
#########################################################	
##  I'm only entering RIAS signal strength linearly here to avoid including
##  a large number of interaction terms	
probit.out <- glm(protest ~

							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout32 +
							invalid32 +
							NSDAP32 +
							SPD32 +
							KPD32 +
							Z32 +
							DNVP32 +
							DVP32 +

							signal *
							(distance_to_berlin +
							I(distance_to_berlin^2) + 
							I(distance_to_berlin^3)) +
							as.factor(size) +
							as.factor(district),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out)

## clustered standard errors
vcov.cl <- cluster.vcov(probit.out, dat$county)
temp <- coeftest(probit.out, vcov.cl)

## Wald test for the interaction
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["signal:distance_to_berlin"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["signal:I(distance_to_berlin^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["signal:I(distance_to_berlin^3)"])] <- 1

r <- rep(0,3)
theta <- probit.out$coef
W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
round(1 - pchisq(W, df = nrow(Q)),3)



#####################################################################
##  Robustness check omitting covariates and using fixed effects only
#####################################################################
table1 <- matrix(NA,8,6)
rownames(table1) <- c(	"intercept",
						"RIAS signal strength",
						"RIAS signal strength$^2$",
						"RIAS signal strength$^3$",
						"Wald test RIAS $p$-value",
						"district fixed effects",
						"county fixed effects",
						"municipality size fixed effects")

table1[6,c(1,3,5)] <- c("Yes","No","No")
table1[7,c(1,3,5)] <- c("No","Yes","No")
table1[8,c(1,3,5)] <- c("No","No","Yes")


probit.out <- glm(protest ~ signal + I(signal^2) + I(signal^3)+

							as.factor(district),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out)

## clustered standard errors
vcov.cl <- cluster.vcov(probit.out, dat$county)
temp <- coeftest(probit.out, vcov.cl)

table1[1:4,1:2] <- hl(temp[1:4,1:2])


## Wald test
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["signal"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(signal^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(signal^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[5,1] <- round(1 - pchisq(W, df = nrow(Q)),3)



probit.out <- glm(protest ~ signal + I(signal^2) + I(signal^3)+

							as.factor(county),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out)

## clustered standard errors
vcov.cl <- cluster.vcov(probit.out, dat$county)
temp <- coeftest(probit.out, vcov.cl)

table1[1:4,3:4] <- hl(temp[1:4,1:2])

## Wald test
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["signal"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(signal^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(signal^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[5,3] <- round(1 - pchisq(W, df = nrow(Q)),3)


probit.out <- glm(protest ~ signal + I(signal^2) + I(signal^3)+

							as.factor(size),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out)

## clustered standard errors
vcov.cl <- cluster.vcov(probit.out, dat$county)
temp <- coeftest(probit.out, vcov.cl)

table1[1:4,5:6] <- hl(temp[1:4,1:2])

## Wald test
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["signal"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(signal^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(signal^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[5,5] <- round(1 - pchisq(W, df = nrow(Q)),3)

latex(table1, file = "")





